Â
List of sections:
1. Load data and perform exclusions
# Load required packages
library(readxl)
library(tidyr)
library(dplyr)
library(ggplot2)
library(smacof)
library(plotly)
library(MASS)
library(fitdistrplus)
library(extremevalues)
library(here)
library(patchwork)
library(scatterplot3d)
library(rgl)
library(grid)
library(gridExtra)
library(viridis)
library(shapes)
library(matrixStats)
library(shiny)
library(shinydashboard)
library(andrews)
library(pheatmap)
library(smacof)
library(reshape2)
# Set working directory
mds_dir <- here::here()
# Load data
df_full <- read.csv(file = paste0(mds_dir,"/MDS_ASC_data"), sep = "\t", header = TRUE, quote = "\"")
# Basic demographic stats before filtering
df_full %>%
summarise(
Number_of_Participants = n(),
Mean_Age = mean(Age, na.rm = TRUE),
SD_Age = sd(Age, na.rm = TRUE),
Male_Participants = sum(Gender == "Man"),
Female_Participants = sum(Gender == "Woman"),
Other_Participants = sum(Gender == "Other")
) %>%
print()
## Number_of_Participants Mean_Age SD_Age Male_Participants
## 1 785 30.10828 8.328384 412
## Female_Participants Other_Participants
## 1 359 14
## Perform exclusions according to the preregistered criteria
#(1) Average response time per dissimilarity rating below 2 seconds
#hist(df_full$time_per_subs, breaks = 100)
cat("Response time above 2s:", sum(df_full$time_per_subs < 2, na.rm = TRUE), "\n")
## Response time above 2s: 0
df_filtered <- filter(df_full, time_per_subs > 2)
#(2) Inaccurate responses to the two control questions
# Test 1
# hist(df_filtered$test1, breaks = 100, xlim = c(0,100), ylim = c(0,50))
cat("Uncorrect response to test1:", sum(df_filtered$test1 < 99, na.rm = TRUE), "\n")
## Uncorrect response to test1: 10
df_filtered <- filter(df_filtered, test1 >= 99)
# Test2
# hist(df_filtered$test2, breaks = 100, xlim = c(0,7), ylim = c(0,100))
cat("Uncorrect response to test2:", sum(df_filtered$test2 > 1, na.rm = TRUE), "\n")
## Uncorrect response to test2: 36
df_filtered <- filter(df_filtered, test2 <= 1) # liberal (2nd step)
# Recode ID variable
df_filtered$ID <- 1:length(df_filtered$ID)
# Basic demographic stats after filtering
df_filtered %>%
summarise(
Number_of_Participants = n(),
Male = sum(Gender == "Man"),
Female = sum(Gender == "Woman"),
Other = sum(Gender == "Other"),
Mean_Age = mean(Age, na.rm = TRUE),
SD_Age = sd(Age, na.rm = TRUE),
) %>%
print()
## Number_of_Participants Male Female Other Mean_Age SD_Age
## 1 739 385 340 14 29.82409 8.087188
colnames(df_filtered) <- gsub("X2CB", "2CB", colnames(df_filtered))
# Define substance codes
substance_codes <- c("Baseline", "Alc", "MJ", "MDMA", "Amf", "LSD", "Psy", "Mef", "Coc",
"Alp", "Ket", "DMT", "N2O", "DXM", "Cod", "Tra", "Her", "Salv",
"GHB", "Dat", "Ben", "2CB", "Diph")
2. Group comparisons using Procrustes Analysis
# Create subsets for procrustes analysis
df_Psychiatric <- df_filtered[df_filtered$EverDiagnosis == "Yes",]
df_NoPsychiatric <- df_filtered[df_filtered$EverDiagnosis == "No",]
df_Medication <- df_filtered[df_filtered$Medication == "Yes",]
df_NoMedication <- df_filtered[df_filtered$Medication == "No",]
df_Males <- df_filtered[df_filtered$Gender == "Man",]
df_Females <- df_filtered[df_filtered$Gender == "Woman",]
# Create all combinations of substance codes
combinations <- expand.grid(Var1 = substance_codes, Var2 = substance_codes, stringsAsFactors = FALSE)
combinations <- subset(combinations, Var1 != Var2)
combinations <- subset(combinations, Var1 < Var2)
column_names <- apply(combinations, 1, function(x) paste(x, collapse = "_"))
# Define subsets for analysis
subsets_names <- c("df_Psychiatric", "df_NoPsychiatric", "df_Medication", "df_NoMedication", "df_Males", "df_Females")
# Loop through each subset
for (subset_name in subsets_names) {
subset <- get(subset_name)
# Process each subject's data
for (i in 1:nrow(subset)) {
subject_data <- subset[i, ]
subject_df <- data.frame(matrix(NA, nrow = length(substance_codes), ncol = length(substance_codes), dimnames = list(substance_codes, substance_codes)), check.names = FALSE)
# Fill the subject's data frame
for (code1 in substance_codes) {
for (code2 in substance_codes) {
col_name1 <- paste(code1, code2, sep = "")
col_name2 <- paste(code2, code1, sep = "")
if (col_name1 != col_name2) {
if (!is.na(subject_data[[col_name1]])) {
subject_df[code1, code2] <- subject_data[[col_name1]]
subject_df[code2, code1] <- subject_data[[col_name1]]
} else if (!is.na(subject_data[[col_name2]])) {
subject_df[code1, code2] <- subject_data[[col_name2]]
subject_df[code2, code1] <- subject_data[[col_name2]]
}
}
}
}
# Assign the subject's data frame to the global environment
assign(paste("df_", i, sep = ""), subject_df, envir = .GlobalEnv)
}
# Initialize matrices for calculations
comparisons_n <- matrix(0, nrow = length(substance_codes), ncol = length(substance_codes))
dm_average <- comparisons_n
# Calculate average values and number of comparisons
for (i in 1:nrow(subset)) {
df_name <- paste("df_", i, sep = "")
df <- get(df_name)
comparisons_n[!is.na(df)] <- comparisons_n[!is.na(df)] + 1
dm_average[!is.na(df)] <- dm_average[!is.na(df)] + as.numeric(df[!is.na(df)])
}
dm_average <- dm_average / comparisons_n
colnames(dm_average) <- substance_codes
rownames(dm_average) <- substance_codes
# Remove specific substances from the analysis
dm_average <- dm_average[!(rownames(dm_average) %in% c("Diph", "Dat", "Ben")), ]
dm_average <- dm_average[, !(colnames(dm_average) %in% c("Diph", "Dat", "Ben"))]
# Assign the result to a new variable in the global environment
dt_name <- paste("dt_", substr(subset_name, 4, nchar(subset_name)), sep = "")
assign(dt_name, dm_average, envir = .GlobalEnv)
}
# Remove temporary subject data frames
variables <- ls()
vars_to_remove <- variables[grep("^df_[0-9]+$", variables)]
rm(list = vars_to_remove)
# Perform Multidimensional Scaling (MDS) for each group
fit.Females <- mds(dt_Females, type = "ordinal", init = "torgerson") # default ndim = 2
fit.Males <- mds(dt_Males, type = "ordinal", init = "torgerson")
fit.Medication <- mds(dt_Medication, type = "ordinal", init = "torgerson")
fit.NoMedication <- mds(dt_NoMedication, type = "ordinal", init = "torgerson")
fit.Psychiatric <- mds(dt_Psychiatric, type = "ordinal", init = "torgerson")
fit.NoPsychiatric <- mds(dt_NoPsychiatric, type = "ordinal", init = "torgerson")
# Perform Procrustes analysis to compare groups
# Gender comparison
fit.proc_gender <- Procrustes(fit.Females$conf, fit.Males$conf)
fit.proc_gender
##
## Call: Procrustes(X = fit.Females$conf, Y = fit.Males$conf)
##
## Congruence coefficient: 0.985
## Alienation coefficient: 0.173
## Correlation coefficient: 0.965
##
## Rotation matrix:
## D1 D2
## D1 -0.789 0.614
## D2 0.614 0.789
##
## Translation vector: 0 0
## Dilation factor: 0.966
op <- par(mfrow = c(2,2))
plot(fit.Females, main = "MDS Females")
plot(fit.Males, main = "MDS Males")
plot(fit.proc_gender)
plot(fit.proc_gender, plot.type = "transplot", length = 0.05)
par(op)
# Medication comparison
fit.proc_medication <- Procrustes(fit.Medication$conf, fit.NoMedication$conf)
fit.proc_medication
##
## Call: Procrustes(X = fit.Medication$conf, Y = fit.NoMedication$conf)
##
## Congruence coefficient: 0.973
## Alienation coefficient: 0.229
## Correlation coefficient: 0.929
##
## Rotation matrix:
## D1 D2
## D1 -0.768 0.640
## D2 0.640 0.768
##
## Translation vector: 0 0
## Dilation factor: 0.926
op <- par(mfrow = c(2,2))
plot(fit.Medication, main = "MDS Medication")
plot(fit.NoMedication, main = "MDS No Medication")
plot(fit.proc_medication)
plot(fit.proc_medication, plot.type = "transplot", length = 0.05)
par(op)
# Psychiatric diagnosis compariso
fit.proc_diagnosis <- Procrustes(fit.Psychiatric$conf, fit.NoPsychiatric$conf)
fit.proc_diagnosis
##
## Call: Procrustes(X = fit.Psychiatric$conf, Y = fit.NoPsychiatric$conf)
##
## Congruence coefficient: 0.984
## Alienation coefficient: 0.178
## Correlation coefficient: 0.959
##
## Rotation matrix:
## D1 D2
## D1 0.958 -0.286
## D2 0.286 0.958
##
## Translation vector: 0 0
## Dilation factor: 0.958
op <- par(mfrow = c(2,2))
plot(fit.Psychiatric, main = "MDS Psychiatric diagnosis")
plot(fit.NoPsychiatric, main = "MDS No Psychiatric diagnosis")
plot(fit.proc_diagnosis)
plot(fit.proc_diagnosis, plot.type = "transplot", length = 0.05)
par(op)
3. Create individual and aggregated dissimilarity matrices
# Create all combinations of substance codes
combinations <- expand.grid(Var1 = substance_codes, Var2 = substance_codes, stringsAsFactors = FALSE)
combinations <- subset(combinations, Var1 != Var2)
combinations <- subset(combinations, Var1 < Var2)
column_names <- apply(combinations, 1, function(x) paste(x, collapse = "_"))
# Loop through each subject's data
for (i in as.numeric(df_filtered$ID)) {
subject_id <- i
# Extract data for the current subject
subject_data <- df_filtered[df_filtered$ID == subject_id, ]
# Create a data frame with substance codes as both rows and columns
subject_df <- data.frame(matrix(NA, nrow = length(substance_codes), ncol = length(substance_codes),dimnames = list(substance_codes, substance_codes)))
colnames(subject_df) <- substance_codes
rownames(subject_df) <- substance_codes
# Populate the data frame with values from the corresponding columns
for (code1 in substance_codes) {
for (code2 in substance_codes) {
col_name1 <- paste(code1, code2, sep = "")
col_name2 <- paste(code2, code1, sep = "")
if (col_name1 != col_name2) {
if (!is.na(subject_data[[col_name1]])) {
subject_df[code1, code2] <- subject_data[[col_name1]]
subject_df[code2, code1] <- subject_data[[col_name1]]
} else if (!is.na(subject_data[[col_name2]])) {
subject_df[code1, code2] <- subject_data[[col_name2]]
subject_df[code2, code1] <- subject_data[[col_name2]]
}
}
}
}
# Assign the subject's data frame to the global environment with a unique name
assign(paste("df_", subject_id, sep = ""), subject_df, envir = .GlobalEnv)
}
# Remove redundant file
rm(subject_df)
rm(subject_data)
# Create a list of all individual data frames
list_df <- lapply(as.numeric(df_filtered$ID), function(i) {
df_name <- paste("df_", i, sep = "")
if (exists(df_name, envir = .GlobalEnv)) {
get(df_name)
} else {
NULL
}
})
## Derive averaged dissimilarity ratings and number of comparisons
# Initialization of empty arrays
comparisons_n <- matrix(0, nrow = ncol(list_df[[1]]), ncol = ncol(list_df[[1]]))
dt_23 <- comparisons_n
# Calculating average values and number of comparisons
for (i in 1:length(list_df)) {
df <- list_df[[i]]
comparisons_n[!is.na(df)] <- comparisons_n[!is.na(df)] + 1
dt_23[!is.na(df)] <- dt_23[!is.na(df)] + as.numeric(df[!is.na(df)])
}
dt_23 <- dt_23 / comparisons_n
# Adding names to rows and columns
colnames(dt_23) <- substance_codes
rownames(dt_23) <- substance_codes
colnames(comparisons_n) <- substance_codes
rownames(comparisons_n) <- substance_codes
# Save the final matrix
dt <- dt_23
rm(df)
# Remove states without the expected number of obtained ratings (Diphenidine,Datura,Benzydamine)
dt <- dt[!(rownames(dt) %in% c("Diph", "Dat","Ben")), ]
dt <- dt[, !(colnames(dt) %in% c("Diph", "Dat","Ben"))]
4. Encode exploratory variables into data frame
# Reduce individual DFs to only present substances
for (i in 1:length(list_df)) {
# Define the data frame name dynamically
df_name <- paste("df_", i, sep = "")
# Access the data frame using get() and the dynamic name
df_i <- get(df_name)
# Identify rows and columns with all NAs
rows_with_all_nas <- which(apply(is.na(df_i), 1, all))
cols_with_all_nas <- which(apply(is.na(df_i), 2, all))
# Remove rows and columns with all NAs
df_i <- df_i[-rows_with_all_nas, -cols_with_all_nas]
# Update the data frame in the global environment
assign(df_name, df_i)
}
# Replace NAs with 0 in individual reduced DFs
for (i in 1:length(list_df)) {
# Define the data frame name dynamically
df_name <- paste("df_", i, sep = "")
# Access the data frame using get() and the dynamic name
df_i <- get(df_name)
# Replace NA values with 0
df_i[is.na(df_i)] <- 0
# Update the data frame in the global environment
assign(df_name, df_i)
}
# Extract vectors with values for unique comparisons and save in a data frame
# Initialize an empty data frame for results
results_df <- data.frame(Comparison = character(0), stringsAsFactors = FALSE)
# Loop through unique substance code pairs
for (substance1 in substance_codes) {
for (substance2 in substance_codes) {
if (substance1 != substance2) { # Avoid self-comparisons
# Create a column name for the comparison
col_name <- paste(substance1, "_vs_", substance2, sep = "")
# Initialize a vector to store comparison values
comparison_values <- character(0)
# Loop through individual data frames
for (i in 1:length(list_df)) {
# Access the data frame by name
df_name <- paste("df_", i, sep = "")
df_i <- get(df_name)
# Check if substances exist in the data frame
if (substance1 %in% rownames(df_i) && substance2 %in% colnames(df_i)) {
# Extract the dissimilarity value
value <- df_i[substance1, substance2]
comparison_values <- c(comparison_values, value)
} else {
# Use NA if comparison doesn't exist
comparison_values <- c(comparison_values, NA)
}
}
# Create and add a row for this comparison
comparison_row <- data.frame(Comparison = col_name, t(comparison_values))
results_df <- rbind(results_df, comparison_row)
}
}
}
# Reduce symmetric rows (e.g. Alk_Kod, Kod_Alk)
# Initialize a new data frame for reduced results
results_df_reduced <- data.frame(Comparison = character(0), stringsAsFactors = FALSE)
# Track unique comparisons
unique_comparisons <- character(0)
# Loop through rows in the original results_df
for (i in 1:nrow(results_df)) {
current_comparison <- results_df$Comparison[i]
# Check if symmetric comparison exists
symmetric_comparison <- paste(rev(unlist(strsplit(current_comparison, "_vs_"))), collapse = "_vs_")
if (!(symmetric_comparison %in% unique_comparisons)) {
# Add new unique comparison
unique_comparisons <- c(unique_comparisons, current_comparison)
results_df_reduced <- rbind(results_df_reduced , results_df[i, ])
}
}
# View the reduced results data frame
View(results_df_reduced)
ds <- results_df_reduced
col_num <- ncol(ds) # original number of columns
# Convert columns to numeric
ds[, 2:col_num] <- sapply(ds[, 2:col_num], as.numeric)
# Calculate mean dissimilarity and variance
ds$mean_dissimilarity <- rowMeans(ds[2:col_num], na.rm = TRUE)
ds$variance <- apply(ds[2:col_num], 1, sd, na.rm = TRUE)
codes <- substance_codes
# Initialize data frame for substance-related variables
dss <- data.frame(state = codes, mean_dissimilarity = 0, variance = 0)
# Aggregate data for each substance code
for (code in codes) {
# Find rows in "ds" where the Comparison column contains the code
matching_rows <- ds[grep(code, ds$Comparison), ]
# Calculate mean and standard deviation, excluding NA and NaN
mean_value <- mean(matching_rows$mean_dissimilarity, na.rm = TRUE)
sd_value <- mean(matching_rows$variance, na.rm = TRUE)
# Update the aggregated data frame
dss[dss$state == code, "mean_dissimilarity"] <- mean_value
dss[dss$state == code, "variance"] <- sd_value
}
# Add dissimilarity to baseline variable
dsb <- ds[c(1:22),]
dsb$Comparison <- sub("Baseline_vs_", "", dsb$Comparison)
names(dsb)[names(dsb) == "Comparison"] <- "state"
# Calculate dissimilarity to baseline
dsb$dissimilarity_to_baseline <- rowMeans(dsb[2:col_num], na.rm = TRUE)
new_row <- data.frame(state = "Baseline", dissimilarity_to_baseline = 0)
dsb <- dsb[,c(1, 743)]
dsb <- rbind(dsb, new_row)
dss <- left_join(dss, dsb)
## Joining with `by = join_by(state)`
# Add confidence ratings
dc <- df_filtered
dc <- dc[, 538:560] # confidence ratings
colnames(dc) <- sub("Conf.*$", "", colnames(dc))
dc <- sapply(dc, as.numeric)
col_means <- colMeans(dc, na.rm = TRUE) # calculate mean confidence for each substance from all subjects
col_vars <- sqrt(colVars(dc, na.rm = TRUE))# calculate variance of confidence ratings
# Convert the result into a new row and add it to the data frame
dc <- rbind(dc, col_means, col_vars)
# Rename the new row to something meaningful, e.g., "ColumnMeans"
rownames(dc)[(nrow(dc))-1] <- "confidence"
rownames(dc)[nrow(dc)] <- "confidence_variance"
dc <- as.data.frame(dc)
# Reduce to essential variables
dc_confidence <- as.data.frame(dc[(nrow(dc) - 1), ])# extract mean confidence variable
dc_confidence$state <- rownames(dc_confidence)
dc_confidence_variance <- as.data.frame(dc[nrow(dc), ])
dc_confidence_variance$state <- rownames(dc_confidence_variance)
dc_confidence_variance <- gather(dc_confidence_variance, key = "state", value = "confidence_variance")
dc_confidence<- gather(dc_confidence, key = "state", value = "confidence")
# Merge data frames
dss <- left_join(dss, dc_confidence)
## Joining with `by = join_by(state)`
dss <- left_join(dss, dc_confidence_variance)
## Joining with `by = join_by(state)`
5. Analyze exploratory variables
dss <- dss[!(dss$state %in% c("Dat", "Ben", "Diph")), ]
point_col <- c(
Baseline = "#2B2B2B",
Alc = "#8A99BF", #Alcohol
MJ = "#327D43", #Marijuana
MDMA = "#7B3894",#MDMA
Amf = "#8B2B2B", #Amphetamine
LSD = "#5998BA", #LSD
Psy = "#5DADB3", #Psylocybine
Mef = "#A23E71", #Mephedrone
Coc = "#BF436E", #Cocaine
Alp = "#6B86B0", #Aplrazolam
Ket = "#505CB9", #Ketamine
DMT = "#108BB8", #DMT
N2O = "#6861C7", #Nitrous Oxide
DXM = "#7A65A8", #DXM
Cod = "#ACA232", #Codeine
Tra = "#AC845F", #Tramadol
Her = "#755B28", #Heroine/Morphine
Salv = "#AFA0BD", #Salvia Divinorum
GHB = "#617991", #GHB
`2CB` = "#6AA4BA" #2CB
)
shapiro.test(dss$confidence)
##
## Shapiro-Wilk normality test
##
## data: dss$confidence
## W = 0.9187, p-value = 0.09353
shapiro.test(dss$confidence_variance)
##
## Shapiro-Wilk normality test
##
## data: dss$confidence_variance
## W = 0.97073, p-value = 0.7701
shapiro.test(dss$variance)
##
## Shapiro-Wilk normality test
##
## data: dss$variance
## W = 0.95286, p-value = 0.4126
# Function to create a scatter plot with correlation info
create_scatter_plot <- function(data, x_var, y_var, x_label, y_label) {
# Calculate Pearson correlation
cor_test <- cor.test(data[[x_var]], data[[y_var]])
r <- round(cor_test$estimate, 3)
p_value <- cor_test$p.value
df <- cor_test$parameter
# Format p-value
if (p_value < 0.001) {
p_text <- "p < 0.001"
} else {
p_text <- paste("p =", format(p_value, digits = 3))
}
# Create the stats text
stats_text <- paste0("r(", df, ") = ", r, " ", p_text)
# Create the plot
ggplot(data, aes_string(x = x_var, y = y_var)) +
geom_point(color = "#3b528b", alpha = 0.6) +
geom_smooth(method = "lm", color = "#21918c", se = FALSE) +
labs(
x = x_label,
y = y_label
) +
annotate("text", x = Inf, y = Inf, label = stats_text,
hjust = 1, vjust = 1, size = 3, color = "black") +
theme_minimal() +
theme(axis.title = element_text(size = 10))
}
# Create the three plots
plot1 <- create_scatter_plot(dss, "variance", "confidence",
"Dissimilarity ratings (SD)", "Confidence")
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
plot2 <- create_scatter_plot(dss, "variance", "confidence_variance",
"Dissimilarity ratings (SD)", "Confidence (SD)")
plot3 <- create_scatter_plot(dss, "confidence", "confidence_variance",
"Confidence", "Confidence (SD)")
# Combine the plots side by side
combined_plot <- plot1 + plot2 + plot3 +
plot_layout(ncol = 3) +
plot_annotation(
theme = theme(plot.margin = margin(t = 10, r = 10, b = 10, l = 10, unit = "pt"))
)
# Display the combined plot
print(combined_plot)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
 6. 4D Multidimensional
scaling
## Perform 3D Multidimensional Scaling
model4d <- mds(dt, ndim = 4, type = "ordinal", init = "torgerson")
mds_df <- data.frame(Dim1 = model4d$conf[,1],
Dim2 = model4d$conf[,2],
Dim3 = model4d$conf[,3],
Dim4 = model4d$conf[,4])
# Calculate and display stress value
stress_val = round(model4d$stress,4)
stress_val
## [1] 0.0529
labels <- colnames(dt)
## Plot 4D MDS model - with color as the 4th dimension
custom_colors <- colorRampPalette(c("deepskyblue3", "deeppink3"))(10)
#custom_colors <- colorRampPalette(c("black", "goldenrod3"))(10)
axis_limits = c(-0.8,0.8)
# Create an interactive 4D scatter plot with equally sized points
p <- plot_ly(
data = mds_df,
x = ~Dim1, y = ~Dim2, z = ~Dim3,
color = ~Dim4, # Use Dim4 for color
colors = custom_colors, # Use the original custom_colors
type = 'scatter3d',
mode = 'markers')
# Add text labels next to the data points with black color
p <- p %>% add_text(
text = ~labels, ### OPTIONAL: labels_full (datapoints)
textposition = "bottom center",
showlegend = FALSE,
color = I("black"))
# Edit layout
p <- p %>% layout(
scene = list(
xaxis = list(title = 'Dim1', titlefont = list(size = 16, family = "Arial", weight = "bold"), range = axis_limits),
yaxis = list(title = 'Dim2', titlefont = list(size = 16, family = "Arial", weight = "bold"), range = axis_limits),
zaxis = list(title = 'Dim3', titlefont = list(size = 16, family = "Arial", weight = "bold"), range = axis_limits),
aspectmode = "cube"
),
title = paste("4D MDS (Stress =", round(model4d$stress,3), ")"),
legend = list(
title = list(text = 'Dim4'), # Add legend title for Dim4
traceorder = "normal",
itemsizing = "constant"
)
)
# Print the plot
p
## Plot 4D MDS model - with color as the 1st dimension
custom_colors <- colorRampPalette(c("black", "cyan3"))(10)
p_alt <- plot_ly(
data = mds_df,
x = ~Dim2, y = ~Dim3, z = ~Dim4,
color = ~Dim1,
colors = custom_colors,
type = 'scatter3d',
mode = 'markers'
) %>%
add_markers() %>%
layout(
scene = list(
xaxis = list(title = 'Dim2', titlefont = list(size = 16, family = "Arial", weight = "bold"), range = axis_limits),
yaxis = list(title = 'Dim3', titlefont = list(size = 16, family = "Arial", weight = "bold"), range = axis_limits),
zaxis = list(title = 'Dim4', titlefont = list(size = 16, family = "Arial", weight = "bold"), range = axis_limits),
aspectmode = "cube"
),
title = "4D MDS Plot (Dim1 as color)",
legend = list(title = list(text = 'Dim1'))
) %>%
add_text(text = ~labels, textposition = "top right", showlegend = FALSE)
# Print the alternative plot
p_alt
## Warning: textfont.color doesn't (yet) support data arrays
## Warning: textfont.color doesn't (yet) support data arrays
## Create a sequence of plots to represent 4D MDS
# Function to create a 3D plot
create_3d_plot <- function(df, x_col, y_col, z_col, color_col, title) {
plot_ly(
data = df,
x = ~get(x_col), y = ~get(y_col), z = ~get(z_col),
color = ~get(color_col),
colors = colorRamp(c("blue", "red")),
type = 'scatter3d',
mode = 'markers+text',
marker = list(size = 5),
text = rownames(df),
textposition = "top center",
hoverinfo = 'text',
hovertext = ~paste(
rownames(df), "<br>",
x_col, ": ", signif(get(x_col), 3), "<br>",
y_col, ": ", signif(get(y_col), 3), "<br>",
z_col, ": ", signif(get(z_col), 3), "<br>",
color_col, ": ", signif(get(color_col), 3)
)
) %>%
layout(
scene = list(
aspectmode = "cube",
xaxis = list(title = x_col, titlefont = list(size = 14, family = "Arial", weight = "bold"), range = axis_limits),
yaxis = list(title = y_col, titlefont = list(size = 14, family = "Arial", weight = "bold"), range = axis_limits),
zaxis = list(title = z_col, titlefont = list(size = 14, family = "Arial", weight = "bold"), range = axis_limits)
),
title = title
)
}
# Create four 3D plots
p1 <- create_3d_plot(mds_df, "Dim1", "Dim2", "Dim3", "Dim4", "4D MDS: Dim1, Dim2, Dim3 (Color: Dim4)")
p1
## Warning: textfont.color doesn't (yet) support data arrays
## Warning: textfont.color doesn't (yet) support data arrays
p2 <- create_3d_plot(mds_df, "Dim1", "Dim2", "Dim4", "Dim3", "4D MDS: Dim1, Dim2, Dim4 (Color: Dim3)")
p2
## Warning: textfont.color doesn't (yet) support data arrays
## Warning: textfont.color doesn't (yet) support data arrays
p3 <- create_3d_plot(mds_df, "Dim1", "Dim3", "Dim4", "Dim2", "4D MDS: Dim1, Dim3, Dim4 (Color: Dim2)")
p3
## Warning: textfont.color doesn't (yet) support data arrays
## Warning: textfont.color doesn't (yet) support data arrays
p4 <- create_3d_plot(mds_df, "Dim2", "Dim3", "Dim4", "Dim1", "4D MDS: Dim2, Dim3, Dim4 (Color: Dim1)")
p4
## Warning: textfont.color doesn't (yet) support data arrays
## Warning: textfont.color doesn't (yet) support data arrays
## OPTIONAL: Create interactive SHINY app plot to explore the data
# plot_4d_mds <- function(df, x_col = "Dim1", y_col = "Dim2", z_col = "Dim3", color_col = "Dim4") {
# plot_ly(
# data = df,
# x = ~get(x_col), y = ~get(y_col), z = ~get(z_col),
# color = ~get(color_col),
# colors = colorRamp(c("blue", "red")),
# type = 'scatter3d',
# mode = 'markers+text',
# marker = list(size = 5),
# text = rownames(df),
# textposition = "top center",
# hoverinfo = 'text',
# hovertext = ~paste(
# rownames(df), "<br>",
# "Dim1:", signif(Dim1, 3), "<br>",
# "Dim2:", signif(Dim2, 3), "<br>",
# "Dim3:", signif(Dim3, 3), "<br>",
# "Dim4:", signif(Dim4, 3)
# )
# ) %>%
# layout(
# scene = list(
# aspectmode = "cube",
# xaxis = list(title = x_col),
# yaxis = list(title = y_col),
# zaxis = list(title = z_col)
# ),
# title = paste("4D MDS Visualization (", x_col, y_col, z_col, "color:", color_col, ")")
# )
# }
# p <- plot_4d_mds(mds_df)
# print(p)
#
# ui <- dashboardPage(
# dashboardHeader(title = "4D MDS Visualization"),
# dashboardSidebar(
# selectInput("x_axis", "X-axis", choices = c("Dim1", "Dim2", "Dim3", "Dim4")),
# selectInput("y_axis", "Y-axis", choices = c("Dim1", "Dim2", "Dim3", "Dim4")),
# selectInput("z_axis", "Z-axis", choices = c("Dim1", "Dim2", "Dim3", "Dim4")),
# selectInput("color", "Color", choices = c("Dim1", "Dim2", "Dim3", "Dim4"))
# ),
# dashboardBody(
# plotlyOutput("mds_plot")
# )
# )
#
# server <- function(input, output) {
# output$mds_plot <- renderPlotly({
# plot_4d_mds(mds_df, input$x_axis, input$y_axis, input$z_axis, input$color)
# })
# }
#
# shinyApp(ui, server)
7. Heatmaps for high-dimensional data
# Function to create and print heatmap
create_heatmap <- function(mds_df, dim) {
heatmap_plot <- pheatmap(as.matrix(mds_df),
main = paste(dim, "D MDS - Heatmap with Clustering for Different States"),
show_rownames = TRUE,
show_colnames = TRUE,
color = colorRampPalette(c("navy", "white", "deeppink4"))(50),
clustering_distance_rows = "euclidean",
clustering_distance_cols = "euclidean",
clustering_method = "complete",
border_color = NA,
fontsize = 10,
fontsize_row = 8,
fontsize_col = 8)
print(heatmap_plot)
}
# 4D Model
model4d <- mds(dt, ndim = 4, type = "ordinal", init = "torgerson")
mds_df_4d <- data.frame(Dim1 = model4d$conf[,1],
Dim2 = model4d$conf[,2],
Dim3 = model4d$conf[,3],
Dim4 = model4d$conf[,4])
create_heatmap(mds_df_4d, "4")
# 6D Model
model6d <- mds(dt, ndim = 6, type = "ordinal", init = "torgerson")
mds_df_6d <- data.frame(Dim1 = model6d$conf[,1],
Dim2 = model6d$conf[,2],
Dim3 = model6d$conf[,3],
Dim4 = model6d$conf[,4],
Dim5 = model6d$conf[,5],
Dim6 = model6d$conf[,6])
create_heatmap(mds_df_6d, "6")
# 8D Model
model8d <- mds(dt, ndim = 8, type = "ordinal", init = "torgerson")
mds_df_8d <- data.frame(Dim1 = model8d$conf[,1],
Dim2 = model8d$conf[,2],
Dim3 = model8d$conf[,3],
Dim4 = model8d$conf[,4],
Dim5 = model8d$conf[,5],
Dim6 = model8d$conf[,6],
Dim7 = model8d$conf[,7],
Dim8 = model8d$conf[,8])
create_heatmap(mds_df_8d, "8")
# export 4 x 5 cm
8. Andrews curves for high-dimensional data
# Define the grouping with colors in the correct order
group_col <- c(
Baseline = "#2B2B2B",
Depressants = "#8A99BF",
Opioids = "#ACA232",
Stimulants = "#A23E71",
MDMA = "#7B3894",
Marijuana = "#327D43",
Psychedelics = "#5998BA",
Dissociatives = "#505CB9"
)
# Function to create semi-transparent colors
add_alpha <- function(col, alpha=0.1) {
rgb(t(col2rgb(col)), alpha=alpha*255, maxColorValue=255)
}
# Create a vector of line types for individual substances
line_types <- c("solid", "dotdash", "solid", "solid", "longdash",
"solid", "solid", "longdash", "longdash", "dotdash",
"solid", "solid", "solid", "solid", "dotdash",
"dotdash", "dotdash", "solid", "dotdash", "solid")
# Function to plot Andrews curves for individual substances
plot_andrews_individual <- function(mds_df, dim) {
color_vector <- point_col[rownames(mds_df)]
transparent_color_vector <- sapply(color_vector, add_alpha)
andrews(as.matrix(mds_df),
ymax = 4,
main = paste(dim, "D MDS - Andrews Curves for Individual Substances"),
clr = transparent_color_vector,
lwd = 2,
step = 200,
lty = line_types)
legend("topright",
legend = rownames(mds_df),
col = color_vector,
lty = line_types,
cex = 0.7,
xpd = TRUE,
inset = c(-0.1, 0),
title = "Substances")
}
# Function to create grouped data
create_grouped_data <- function(mds_df) {
data.frame(
Baseline = colMeans(mds_df["Baseline", , drop = FALSE]),
Depressants = colMeans(mds_df[c("Alc", "Alp", "GHB"), ]),
Opioids = colMeans(mds_df[c("Tra", "Cod", "Her"), ]),
Stimulants = colMeans(mds_df[c("Coc", "Mef", "Amf"), ]),
MDMA = colMeans(mds_df["MDMA", , drop = FALSE]),
Marijuana = colMeans(mds_df["MJ", , drop = FALSE]),
Psychedelics = colMeans(mds_df[c("LSD", "Psy", "DMT", "2CB"), ]),
Dissociatives = colMeans(mds_df[c("Ket", "N2O", "DXM", "Salv"), ])
)
}
# Function to plot Andrews curves for grouped substances
plot_andrews_grouped <- function(grouped_mds, dim) {
andrews(t(grouped_mds),
ymax = 4,
main = paste(dim, "D MDS - Andrews Curves for Grouped Substances"),
clr = group_col,
lwd = 2,
step = 200,
lty = c("solid","longdash","longdash","solid","solid","dashed","solid","solid"))
legend("topright",
legend = names(group_col),
col = group_col,
lty = c("solid","longdash","longdash","solid","solid","dashed","solid","solid"),
lwd = 2,
cex = 0.7,
xpd = TRUE,
inset = c(-0.2, 0),
title = "Substance Groups")
}
# 4D Model
par(mfrow = c(1, 1), mar = c(5, 4, 4, 10) + 0.1)
plot_andrews_individual(mds_df_4d, "4")
plot_andrews_grouped(create_grouped_data(mds_df_4d), "4")
# 6D Model
par(mfrow = c(1, 1), mar = c(5, 4, 4, 10) + 0.1)
plot_andrews_individual(mds_df_6d, "6")
plot_andrews_grouped(create_grouped_data(mds_df_6d), "6")
# 8D Model
par(mfrow = c(1, 1), mar = c(5, 4, 4, 10) + 0.1)
plot_andrews_individual(mds_df_8d, "8")
plot_andrews_grouped(create_grouped_data(mds_df_8d), "8")
# Interpretation of Andrews Curves:
# - Each dimension contributes to the entire curve shape, not just specific points.
# - x1 affects the overall vertical shift of the curve.
# - x2 and x3 contribute to the primary sinusoidal shape.
# - x4 and higher dimensions add higher frequency components to the curve.
# - Similar curve shapes suggest similarity in the multidimensional space.
# - Differences in curve shapes, especially at peaks and troughs, indicate differences in the space.
# - This visualization allows you to compare the entire multidimensional structure of each point (state) in a 2D plot.
# - The power of Andrews curves is in showing overall similarities and differences between multidimensional points, not in isolating individual dimensions.
# - Grouped curves show average characteristics of substance classes, potentially revealing broader patterns.
# - Comparing individual and grouped plots can highlight which substances are typical or atypical for their class.
9. Subspaces for substance classes
# Updated color scheme
point_col <- c(
Baseline = "#2B2B2B", Alc = "#8A99BF", MJ = "#327D43", MDMA = "#7B3894",
Amf = "#8B2B2B", LSD = "#5998BA", Psy = "#5DADB3", Mef = "#A23E71",
Coc = "#BF436E", Alp = "#6B86B0", Ket = "#505CB9", DMT = "#108BB8",
N2O = "#6861C7", DXM = "#7A65A8", Cod = "#ACA232", Tra = "#AC845F",
Her = "#755B28", Salv = "#AFA0BD", GHB = "#617991", `2CB` = "#6AA4BA"
)
# Calculate overall x and y axis limits for all datasets
get_axis_limits <- function(...) {
datasets <- list(...)
all_coords <- do.call(rbind, lapply(datasets, function(model) as.data.frame(model$conf)))
xlim <- range(all_coords$D1)
ylim <- range(all_coords$D2)
return(list(xlim = xlim, ylim = ylim))
}
# Function to create 2D MDS plot with fixed axis limits
create_2d_mds_plot <- function(model, title, axis_limits) {
coords <- as.data.frame(model$conf)
states <- rownames(coords)
sizes <- sqrt(model$spp)
sizes <- ((sizes - min(sizes)) / (max(sizes) - min(sizes))) * (9 - 4) + 4
p <- ggplot(coords, aes(x = D1, y = D2)) +
geom_point(color = alpha(point_col[states], 0.5), size = sizes) +
geom_point(color = point_col[states], size = 2) +
geom_text(aes(label = states), hjust = -0.5, vjust = -0.5, size = 3) +
labs(title = title, x = "Dimension 1", y = "Dimension 2") +
coord_fixed(xlim = axis_limits$xlim, ylim = axis_limits$ylim) +
theme_minimal() +
theme(legend.position = "none")
return(p)
}
# Manually extracting subsets for each subgroup
dt_set_stimulants_and_depressants <- dt[rownames(dt) %in% c("Amf", "Mef", "Coc", "Alc", "Alp", "Her", "Tra", "Cod", "GHB", "Baseline"), ]
dt_set_stimulants_and_depressants <- dt_set_stimulants_and_depressants[, colnames(dt_set_stimulants_and_depressants) %in% c("Amf", "Mef", "Coc", "Alc", "Alp", "Her", "Tra", "Cod", "GHB", "Baseline")]
dt_set_stimulants_and_depressants_with_mdma_mj <- dt[rownames(dt) %in% c("Amf", "Mef", "Coc", "Alc", "Alp", "Her", "Tra", "Cod", "GHB", "Baseline", "MDMA", "MJ"), ]
dt_set_stimulants_and_depressants_with_mdma_mj <- dt_set_stimulants_and_depressants_with_mdma_mj[, colnames(dt_set_stimulants_and_depressants_with_mdma_mj) %in% c("Amf", "Mef", "Coc", "Alc", "Alp", "Her", "Tra", "Cod", "GHB", "Baseline", "MDMA", "MJ")]
dt_set_psychedelics_and_dissociatives <- dt[rownames(dt) %in% c("LSD", "Psy", "2CB", "DMT", "Salv", "Ket", "DXM", "N2O"), ]
dt_set_psychedelics_and_dissociatives <- dt_set_psychedelics_and_dissociatives[, colnames(dt_set_psychedelics_and_dissociatives) %in% c("LSD", "Psy", "2CB", "DMT", "Salv", "Ket", "DXM", "N2O")]
dt_set_psychedelics_and_dissociatives_with_mdma_mj <- dt[rownames(dt) %in% c("LSD", "Psy", "2CB", "DMT", "Salv", "Ket", "DXM", "N2O", "MDMA", "MJ"), ]
dt_set_psychedelics_and_dissociatives_with_mdma_mj <- dt_set_psychedelics_and_dissociatives_with_mdma_mj[, colnames(dt_set_psychedelics_and_dissociatives_with_mdma_mj) %in% c("LSD", "Psy", "2CB", "DMT", "Salv", "Ket", "DXM", "N2O", "MDMA", "MJ")]
dt_set_psychedelics_and_dissociatives_with_mdma_mj_baseline <- dt[rownames(dt) %in% c("LSD", "Psy", "2CB", "DMT", "Salv", "Ket", "DXM", "N2O", "MDMA", "MJ", "Baseline"), ]
dt_set_psychedelics_and_dissociatives_with_mdma_mj_baseline <- dt_set_psychedelics_and_dissociatives_with_mdma_mj_baseline[, colnames(dt_set_psychedelics_and_dissociatives_with_mdma_mj_baseline) %in% c("LSD", "Psy", "2CB", "DMT", "Salv", "Ket", "DXM", "N2O", "MDMA", "MJ", "Baseline")]
# Perform MDS for all datasets (2D and 3D separately)
m2o_sd <- mds(dt_set_stimulants_and_depressants, ndim = 2, type = "ordinal", init = "torgerson")
m2o_sd_mdma_mj <- mds(dt_set_stimulants_and_depressants_with_mdma_mj, ndim = 2, type = "ordinal", init = "torgerson")
m2o_pd <- mds(dt_set_psychedelics_and_dissociatives, ndim = 2, type = "ordinal", init = "torgerson")
m2o_pd_mdma_mj <- mds(dt_set_psychedelics_and_dissociatives_with_mdma_mj, ndim = 2, type = "ordinal", init = "torgerson")
m2o_pd_mdma_mj_baseline <- mds(dt_set_psychedelics_and_dissociatives_with_mdma_mj_baseline, ndim = 2, type = "ordinal", init = "torgerson")
# Get axis limits for all 2D plots
axis_limits <- get_axis_limits(m2o_sd, m2o_sd_mdma_mj, m2o_pd, m2o_pd_mdma_mj, m2o_pd_mdma_mj_baseline)
# Plot 1: Stimulants and Depressants
p1_sd <- create_2d_mds_plot(
m2o_sd,
paste("Stimulants and Depressants \n (Stress =", round(m2o_sd$stress, 3), ")"),
axis_limits)
# Plot 2: Stimulants and Depressants with MDMA & Marijuana
p2_sd_mdma_mj <- create_2d_mds_plot(
m2o_sd_mdma_mj,
paste("Stimulants and Depressants with MDMA & Marijuana \n (Stress =", round(m2o_sd_mdma_mj$stress, 3), ")"),
axis_limits)
# Plot 3: Psychedelics and Dissociatives
p3_pd <- create_2d_mds_plot(
m2o_pd,
paste("Psychedelics and Dissociatives \n (Stress =", round(m2o_pd$stress, 3), ")"),
axis_limits)
# Plot 4: Psychedelics and Dissociatives with MDMA & Marijuana
p4_pd_mdma_mj <- create_2d_mds_plot(
m2o_pd_mdma_mj,
paste("Psychedelics and Dissociatives with MDMA & Marijuana \n (Stress =", round(m2o_pd_mdma_mj$stress, 3), ")"), axis_limits)
# Plot 5: Psychedelics and Dissociatives with MDMA, Marijuana, & Baseline
p5_pd_mdma_mj_baseline <- create_2d_mds_plot(
m2o_pd_mdma_mj_baseline,
paste("Psychedelics and Dissociatives with MDMA, Marijuana, & Baseline \n (Stress =", round(m2o_pd_mdma_mj_baseline$stress, 3), ")"), axis_limits )
#### Display the plots in order (2D first, then 3D)
print(p1_sd)
## Perform 3D Multidimensional Scaling using dt_set_stimulants_and_depressants
model3d_stimulants_and_depressants <- mds(dt_set_stimulants_and_depressants, ndim = 3, type = "ordinal", init = "torgerson")
# Calculate and display stress value
stress_val_stimulants_and_depressants <- round(model3d_stimulants_and_depressants$stress, 4)
stress_val_stimulants_and_depressants
## [1] 0.0096
### Create an interactive 3D scatter plot with stress-wise sizing of individual points
sizes_o_stimulants_and_depressants <- model3d_stimulants_and_depressants$spp
# Apply square-root transformation to point sizes
sizes_o_stimulants_and_depressants <- sqrt(sizes_o_stimulants_and_depressants)
# Normalize point sizes for better visibility
min_range <- 6
max_range <- 16
sizes_o_stimulants_and_depressants <- ((sizes_o_stimulants_and_depressants - min(sizes_o_stimulants_and_depressants)) /
(max(sizes_o_stimulants_and_depressants) - min(sizes_o_stimulants_and_depressants))) *
(max_range - min_range) + min_range
# Create data frame with dimensions and sizes
labels_stimulants_and_depressants <- rownames(model3d_stimulants_and_depressants$conf)
# Ensure that the sizes_o is properly matched with the correct labels
# This aligns only the relevant subset of point_col with sizes_o
sizes_o_stimulants_and_depressants <- sizes_o_stimulants_and_depressants[match(labels_stimulants_and_depressants, labels_stimulants_and_depressants)]
# Now create the plot using plotly
mds_df_stimulants_and_depressants <- data.frame(
Dim1 = model3d_stimulants_and_depressants$conf[, 1],
Dim2 = model3d_stimulants_and_depressants$conf[, 2],
Dim3 = model3d_stimulants_and_depressants$conf[, 3],
labels = labels_stimulants_and_depressants,
sizes = sizes_o_stimulants_and_depressants,
labels_factor = factor(labels_stimulants_and_depressants, levels = labels_stimulants_and_depressants)
)
# Plot the 3D scatter plot using plotly
p_stimulants_and_depressants <- plot_ly()
p_stimulants_and_depressants <- p_stimulants_and_depressants %>% add_trace(
data = mds_df_stimulants_and_depressants,
x = ~Dim1, y = ~Dim2, z = ~Dim3,
color = ~labels_factor,
colors = point_col[labels_stimulants_and_depressants], # Apply colors only to the subset
type = 'scatter3d',
mode = 'markers',
marker = list(size = ~sizes, sizemode = 'diameter', opacity = 0.3),
showlegend = FALSE
)
p_stimulants_and_depressants <- p_stimulants_and_depressants %>% add_trace(
data = mds_df_stimulants_and_depressants,
x = ~Dim1, y = ~Dim2, z = ~Dim3,
color = ~labels_factor,
colors = point_col[labels_stimulants_and_depressants],
type = 'scatter3d',
mode = 'markers',
marker = list(size = 5, sizemode = 'diameter'),
name = ~labels_factor
)
p_stimulants_and_depressants <- p_stimulants_and_depressants %>% add_text(
data = mds_df_stimulants_and_depressants,
x = ~Dim1, y = ~Dim2, z = ~Dim3,
text = ~labels,
textposition = "bottom center",
showlegend = FALSE,
color = I("black")
)
p_stimulants_and_depressants <- p_stimulants_and_depressants %>% layout(
scene = list(
xaxis = list(title = 'Dim1', titlefont = list(size = 16, family = "Arial", weight = "bold")),
yaxis = list(title = 'Dim2', titlefont = list(size = 16, family = "Arial", weight = "bold")),
zaxis = list(title = 'Dim3', titlefont = list(size = 16, family = "Arial", weight = "bold")),
aspectmode = "cube"
),
title = paste("3D MDS - Stimulants and Depressants \n (Stress =", stress_val_stimulants_and_depressants, ")"),
legend = list(
traceorder = "normal",
itemsizing = "constant",
font = list(size = 12)
)
)
# Display the 3D plot
p_stimulants_and_depressants
#### Display the plots in order (2D first, then 3D)
print(p2_sd_mdma_mj)
## Perform 3D Multidimensional Scaling using dt_set_stimulants_and_depressants_with_mdma_mj
model3d_stimulants_and_depressants_with_mdma_mj <- mds(dt_set_stimulants_and_depressants_with_mdma_mj, ndim = 3, type = "ordinal", init = "torgerson")
# Calculate and display stress value
stress_val_stimulants_and_depressants_with_mdma_mj <- round(model3d_stimulants_and_depressants_with_mdma_mj$stress, 4)
stress_val_stimulants_and_depressants_with_mdma_mj
## [1] 0.0498
### Create an interactive 3D scatter plot with stress-wise sizing of individual points
sizes_o_stimulants_and_depressants_with_mdma_mj <- model3d_stimulants_and_depressants_with_mdma_mj$spp
# Apply square-root transformation to point sizes
sizes_o_stimulants_and_depressants_with_mdma_mj <- sqrt(sizes_o_stimulants_and_depressants_with_mdma_mj)
# Normalize point sizes for better visibility
min_range <- 6
max_range <- 16
sizes_o_stimulants_and_depressants_with_mdma_mj <- ((sizes_o_stimulants_and_depressants_with_mdma_mj - min(sizes_o_stimulants_and_depressants_with_mdma_mj)) /
(max(sizes_o_stimulants_and_depressants_with_mdma_mj) - min(sizes_o_stimulants_and_depressants_with_mdma_mj))) *
(max_range - min_range) + min_range
# Create data frame with dimensions and sizes
labels_stimulants_and_depressants_with_mdma_mj <- rownames(model3d_stimulants_and_depressants_with_mdma_mj$conf)
# Ensure that the sizes_o is properly matched with the correct labels
sizes_o_stimulants_and_depressants_with_mdma_mj <- sizes_o_stimulants_and_depressants_with_mdma_mj[match(labels_stimulants_and_depressants_with_mdma_mj, labels_stimulants_and_depressants_with_mdma_mj)]
# Now create the plot using plotly
mds_df_stimulants_and_depressants_with_mdma_mj <- data.frame(
Dim1 = model3d_stimulants_and_depressants_with_mdma_mj$conf[, 1],
Dim2 = model3d_stimulants_and_depressants_with_mdma_mj$conf[, 2],
Dim3 = model3d_stimulants_and_depressants_with_mdma_mj$conf[, 3],
labels = labels_stimulants_and_depressants_with_mdma_mj,
sizes = sizes_o_stimulants_and_depressants_with_mdma_mj,
labels_factor = factor(labels_stimulants_and_depressants_with_mdma_mj, levels = labels_stimulants_and_depressants_with_mdma_mj)
)
# Plot the 3D scatter plot using plotly
p_stimulants_and_depressants_with_mdma_mj <- plot_ly()
p_stimulants_and_depressants_with_mdma_mj <- p_stimulants_and_depressants_with_mdma_mj %>% add_trace(
data = mds_df_stimulants_and_depressants_with_mdma_mj,
x = ~Dim1, y = ~Dim2, z = ~Dim3,
color = ~labels_factor,
colors = point_col[labels_stimulants_and_depressants_with_mdma_mj], # Apply colors only to the subset
type = 'scatter3d',
mode = 'markers',
marker = list(size = ~sizes, sizemode = 'diameter', opacity = 0.3),
showlegend = FALSE
)
p_stimulants_and_depressants_with_mdma_mj <- p_stimulants_and_depressants_with_mdma_mj %>% add_trace(
data = mds_df_stimulants_and_depressants_with_mdma_mj,
x = ~Dim1, y = ~Dim2, z = ~Dim3,
color = ~labels_factor,
colors = point_col[labels_stimulants_and_depressants_with_mdma_mj],
type = 'scatter3d',
mode = 'markers',
marker = list(size = 5, sizemode = 'diameter'),
name = ~labels_factor
)
p_stimulants_and_depressants_with_mdma_mj <- p_stimulants_and_depressants_with_mdma_mj %>% add_text(
data = mds_df_stimulants_and_depressants_with_mdma_mj,
x = ~Dim1, y = ~Dim2, z = ~Dim3,
text = ~labels,
textposition = "bottom center",
showlegend = FALSE,
color = I("black")
)
p_stimulants_and_depressants_with_mdma_mj <- p_stimulants_and_depressants_with_mdma_mj %>% layout(
scene = list(
xaxis = list(title = 'Dim1', titlefont = list(size = 16, family = "Arial", weight = "bold")),
yaxis = list(title = 'Dim2', titlefont = list(size = 16, family = "Arial", weight = "bold")),
zaxis = list(title = 'Dim3', titlefont = list(size = 16, family = "Arial", weight = "bold")),
aspectmode = "cube"
),
title = paste("3D MDS - Stimulants and Depressants with MDMA & MJ \n (Stress =", stress_val_stimulants_and_depressants_with_mdma_mj, ")"),
legend = list(
traceorder = "normal",
itemsizing = "constant",
font = list(size = 12)
)
)
# Display the 3D plot
p_stimulants_and_depressants_with_mdma_mj
#### Display the plots in order (2D first, then 3D)
print(p3_pd)
## Perform 3D Multidimensional Scaling using dt_set_psychedelics_and_dissociatives
model3d_psychedelics_and_dissociatives <- mds(dt_set_psychedelics_and_dissociatives, ndim = 3, type = "ordinal", init = "torgerson")
# Calculate and display stress value
stress_val_psychedelics_and_dissociatives <- round(model3d_psychedelics_and_dissociatives$stress, 4)
stress_val_psychedelics_and_dissociatives
## [1] 0.0024
### Create an interactive 3D scatter plot with stress-wise sizing of individual points
sizes_o_psychedelics_and_dissociatives <- model3d_psychedelics_and_dissociatives$spp
# Apply square-root transformation to point sizes
sizes_o_psychedelics_and_dissociatives <- sqrt(sizes_o_psychedelics_and_dissociatives)
# Normalize point sizes for better visibility
min_range <- 6
max_range <- 16
sizes_o_psychedelics_and_dissociatives <- ((sizes_o_psychedelics_and_dissociatives - min(sizes_o_psychedelics_and_dissociatives)) /
(max(sizes_o_psychedelics_and_dissociatives) - min(sizes_o_psychedelics_and_dissociatives))) *
(max_range - min_range) + min_range
# Create data frame with dimensions and sizes
labels_psychedelics_and_dissociatives <- rownames(model3d_psychedelics_and_dissociatives$conf)
# Ensure that the sizes_o is properly matched with the correct labels
sizes_o_psychedelics_and_dissociatives <- sizes_o_psychedelics_and_dissociatives[match(labels_psychedelics_and_dissociatives, labels_psychedelics_and_dissociatives)]
# Now create the plot using plotly
mds_df_psychedelics_and_dissociatives <- data.frame(
Dim1 = model3d_psychedelics_and_dissociatives$conf[, 1],
Dim2 = model3d_psychedelics_and_dissociatives$conf[, 2],
Dim3 = model3d_psychedelics_and_dissociatives$conf[, 3],
labels = labels_psychedelics_and_dissociatives,
sizes = sizes_o_psychedelics_and_dissociatives,
labels_factor = factor(labels_psychedelics_and_dissociatives, levels = labels_psychedelics_and_dissociatives)
)
# Plot the 3D scatter plot using plotly
p_psychedelics_and_dissociatives <- plot_ly()
p_psychedelics_and_dissociatives <- p_psychedelics_and_dissociatives %>% add_trace(
data = mds_df_psychedelics_and_dissociatives,
x = ~Dim1, y = ~Dim2, z = ~Dim3,
color = ~labels_factor,
colors = point_col[labels_psychedelics_and_dissociatives], # Apply colors only to the subset
type = 'scatter3d',
mode = 'markers',
marker = list(size = ~sizes, sizemode = 'diameter', opacity = 0.3),
showlegend = FALSE
)
p_psychedelics_and_dissociatives <- p_psychedelics_and_dissociatives %>% add_trace(
data = mds_df_psychedelics_and_dissociatives,
x = ~Dim1, y = ~Dim2, z = ~Dim3,
color = ~labels_factor,
colors = point_col[labels_psychedelics_and_dissociatives],
type = 'scatter3d',
mode = 'markers',
marker = list(size = 5, sizemode = 'diameter'),
name = ~labels_factor
)
p_psychedelics_and_dissociatives <- p_psychedelics_and_dissociatives %>% add_text(
data = mds_df_psychedelics_and_dissociatives,
x = ~Dim1, y = ~Dim2, z = ~Dim3,
text = ~labels,
textposition = "bottom center",
showlegend = FALSE,
color = I("black")
)
p_psychedelics_and_dissociatives <- p_psychedelics_and_dissociatives %>% layout(
scene = list(
xaxis = list(title = 'Dim1', titlefont = list(size = 16, family = "Arial", weight = "bold")),
yaxis = list(title = 'Dim2', titlefont = list(size = 16, family = "Arial", weight = "bold")),
zaxis = list(title = 'Dim3', titlefont = list(size = 16, family = "Arial", weight = "bold")),
aspectmode = "cube"
),
title = paste("3D MDS - Psychedelics and Dissociatives (Stress =", stress_val_psychedelics_and_dissociatives, ")"),
legend = list(
traceorder = "normal",
itemsizing = "constant",
font = list(size = 12)
)
)
# Display the 3D plot
p_psychedelics_and_dissociatives
#### Display the plots in order (2D first, then 3D)
print(p4_pd_mdma_mj)
## Perform 3D Multidimensional Scaling using dt_set_psychedelics_and_dissociatives_with_mdma_mj
model3d_psychedelics_and_dissociatives_with_mdma_mj <- mds(dt_set_psychedelics_and_dissociatives_with_mdma_mj, ndim = 3, type = "ordinal", init = "torgerson")
# Calculate and display stress value
stress_val_psychedelics_and_dissociatives_with_mdma_mj <- round(model3d_psychedelics_and_dissociatives_with_mdma_mj$stress, 4)
stress_val_psychedelics_and_dissociatives_with_mdma_mj
## [1] 0.0456
### Create an interactive 3D scatter plot with stress-wise sizing of individual points
sizes_o_psychedelics_and_dissociatives_with_mdma_mj <- model3d_psychedelics_and_dissociatives_with_mdma_mj$spp
# Apply square-root transformation to point sizes
sizes_o_psychedelics_and_dissociatives_with_mdma_mj <- sqrt(sizes_o_psychedelics_and_dissociatives_with_mdma_mj)
# Normalize point sizes for better visibility
min_range <- 6
max_range <- 16
sizes_o_psychedelics_and_dissociatives_with_mdma_mj <- ((sizes_o_psychedelics_and_dissociatives_with_mdma_mj - min(sizes_o_psychedelics_and_dissociatives_with_mdma_mj)) /
(max(sizes_o_psychedelics_and_dissociatives_with_mdma_mj) - min(sizes_o_psychedelics_and_dissociatives_with_mdma_mj))) *
(max_range - min_range) + min_range
# Create data frame with dimensions and sizes
labels_psychedelics_and_dissociatives_with_mdma_mj <- rownames(model3d_psychedelics_and_dissociatives_with_mdma_mj$conf)
# Ensure that the sizes_o is properly matched with the correct labels
sizes_o_psychedelics_and_dissociatives_with_mdma_mj <- sizes_o_psychedelics_and_dissociatives_with_mdma_mj[match(labels_psychedelics_and_dissociatives_with_mdma_mj, labels_psychedelics_and_dissociatives_with_mdma_mj)]
# Now create the plot using plotly
mds_df_psychedelics_and_dissociatives_with_mdma_mj <- data.frame(
Dim1 = model3d_psychedelics_and_dissociatives_with_mdma_mj$conf[, 1],
Dim2 = model3d_psychedelics_and_dissociatives_with_mdma_mj$conf[, 2],
Dim3 = model3d_psychedelics_and_dissociatives_with_mdma_mj$conf[, 3],
labels = labels_psychedelics_and_dissociatives_with_mdma_mj,
sizes = sizes_o_psychedelics_and_dissociatives_with_mdma_mj,
labels_factor = factor(labels_psychedelics_and_dissociatives_with_mdma_mj, levels = labels_psychedelics_and_dissociatives_with_mdma_mj)
)
# Plot the 3D scatter plot using plotly
p_psychedelics_and_dissociatives_with_mdma_mj <- plot_ly()
p_psychedelics_and_dissociatives_with_mdma_mj <- p_psychedelics_and_dissociatives_with_mdma_mj %>% add_trace(
data = mds_df_psychedelics_and_dissociatives_with_mdma_mj,
x = ~Dim1, y = ~Dim2, z = ~Dim3,
color = ~labels_factor,
colors = point_col[labels_psychedelics_and_dissociatives_with_mdma_mj], # Apply colors only to the subset
type = 'scatter3d',
mode = 'markers',
marker = list(size = ~sizes, sizemode = 'diameter', opacity = 0.3),
showlegend = FALSE
)
p_psychedelics_and_dissociatives_with_mdma_mj <- p_psychedelics_and_dissociatives_with_mdma_mj %>% add_trace(
data = mds_df_psychedelics_and_dissociatives_with_mdma_mj,
x = ~Dim1, y = ~Dim2, z = ~Dim3,
color = ~labels_factor,
colors = point_col[labels_psychedelics_and_dissociatives_with_mdma_mj],
type = 'scatter3d',
mode = 'markers',
marker = list(size = 5, sizemode = 'diameter'),
name = ~labels_factor
)
p_psychedelics_and_dissociatives_with_mdma_mj <- p_psychedelics_and_dissociatives_with_mdma_mj %>% add_text(
data = mds_df_psychedelics_and_dissociatives_with_mdma_mj,
x = ~Dim1, y = ~Dim2, z = ~Dim3,
text = ~labels,
textposition = "bottom center",
showlegend = FALSE,
color = I("black")
)
p_psychedelics_and_dissociatives_with_mdma_mj <- p_psychedelics_and_dissociatives_with_mdma_mj %>% layout(
scene = list(
xaxis = list(title = 'Dim1', titlefont = list(size = 16, family = "Arial", weight = "bold")),
yaxis = list(title = 'Dim2', titlefont = list(size = 16, family = "Arial", weight = "bold")),
zaxis = list(title = 'Dim3', titlefont = list(size = 16, family = "Arial", weight = "bold")),
aspectmode = "cube"
),
title = paste("3D MDS - Psychedelics and Dissociatives with MDMA & MJ \n (Stress =", stress_val_psychedelics_and_dissociatives_with_mdma_mj, ")"),
legend = list(
traceorder = "normal",
itemsizing = "constant",
font = list(size = 12)
)
)
# Display the 3D plot
p_psychedelics_and_dissociatives_with_mdma_mj
#### Display the plots in order (2D first, then 3D)
print(p5_pd_mdma_mj_baseline)
## Perform 3D Multidimensional Scaling using dt_set_psychedelics_and_dissociatives_with_mdma_mj_baseline
model3d_psychedelics_and_dissociatives_with_mdma_mj_baseline <- mds(dt_set_psychedelics_and_dissociatives_with_mdma_mj_baseline, ndim = 3, type = "ordinal", init = "torgerson")
# Calculate and display stress value
stress_val_psychedelics_and_dissociatives_with_mdma_mj_baseline <- round(model3d_psychedelics_and_dissociatives_with_mdma_mj_baseline$stress, 4)
stress_val_psychedelics_and_dissociatives_with_mdma_mj_baseline
## [1] 0.0457
### Create an interactive 3D scatter plot with stress-wise sizing of individual points
sizes_o_psychedelics_and_dissociatives_with_mdma_mj_baseline <- model3d_psychedelics_and_dissociatives_with_mdma_mj_baseline$spp
# Apply square-root transformation to point sizes
sizes_o_psychedelics_and_dissociatives_with_mdma_mj_baseline <- sqrt(sizes_o_psychedelics_and_dissociatives_with_mdma_mj_baseline)
# Normalize point sizes for better visibility
min_range <- 6
max_range <- 16
sizes_o_psychedelics_and_dissociatives_with_mdma_mj_baseline <- ((sizes_o_psychedelics_and_dissociatives_with_mdma_mj_baseline - min(sizes_o_psychedelics_and_dissociatives_with_mdma_mj_baseline)) /
(max(sizes_o_psychedelics_and_dissociatives_with_mdma_mj_baseline) - min(sizes_o_psychedelics_and_dissociatives_with_mdma_mj_baseline))) *
(max_range - min_range) + min_range
# Create data frame with dimensions and sizes
labels_psychedelics_and_dissociatives_with_mdma_mj_baseline <- rownames(model3d_psychedelics_and_dissociatives_with_mdma_mj_baseline$conf)
# Ensure that the sizes_o is properly matched with the correct labels
sizes_o_psychedelics_and_dissociatives_with_mdma_mj_baseline <- sizes_o_psychedelics_and_dissociatives_with_mdma_mj_baseline[match(labels_psychedelics_and_dissociatives_with_mdma_mj_baseline, labels_psychedelics_and_dissociatives_with_mdma_mj_baseline)]
# Now create the plot using plotly
mds_df_psychedelics_and_dissociatives_with_mdma_mj_baseline <- data.frame(
Dim1 = model3d_psychedelics_and_dissociatives_with_mdma_mj_baseline$conf[, 1],
Dim2 = model3d_psychedelics_and_dissociatives_with_mdma_mj_baseline$conf[, 2],
Dim3 = model3d_psychedelics_and_dissociatives_with_mdma_mj_baseline$conf[, 3],
labels = labels_psychedelics_and_dissociatives_with_mdma_mj_baseline,
sizes = sizes_o_psychedelics_and_dissociatives_with_mdma_mj_baseline,
labels_factor = factor(labels_psychedelics_and_dissociatives_with_mdma_mj_baseline, levels = labels_psychedelics_and_dissociatives_with_mdma_mj_baseline)
)
# Plot the 3D scatter plot using plotly
p_psychedelics_and_dissociatives_with_mdma_mj_baseline <- plot_ly()
p_psychedelics_and_dissociatives_with_mdma_mj_baseline <- p_psychedelics_and_dissociatives_with_mdma_mj_baseline %>% add_trace(
data = mds_df_psychedelics_and_dissociatives_with_mdma_mj_baseline,
x = ~Dim1, y = ~Dim2, z = ~Dim3,
color = ~labels_factor,
colors = point_col[labels_psychedelics_and_dissociatives_with_mdma_mj_baseline], # Apply colors only to the subset
type = 'scatter3d',
mode = 'markers',
marker = list(size = ~sizes, sizemode = 'diameter', opacity = 0.3),
showlegend = FALSE
)
p_psychedelics_and_dissociatives_with_mdma_mj_baseline <- p_psychedelics_and_dissociatives_with_mdma_mj_baseline %>% add_trace(
data = mds_df_psychedelics_and_dissociatives_with_mdma_mj_baseline,
x = ~Dim1, y = ~Dim2, z = ~Dim3,
color = ~labels_factor,
colors = point_col[labels_psychedelics_and_dissociatives_with_mdma_mj_baseline],
type = 'scatter3d',
mode = 'markers',
marker = list(size = 5, sizemode = 'diameter'),
name = ~labels_factor
)
p_psychedelics_and_dissociatives_with_mdma_mj_baseline <- p_psychedelics_and_dissociatives_with_mdma_mj_baseline %>% add_text(
data = mds_df_psychedelics_and_dissociatives_with_mdma_mj_baseline,
x = ~Dim1, y = ~Dim2, z = ~Dim3,
text = ~labels,
textposition = "bottom center",
showlegend = FALSE,
color = I("black")
)
p_psychedelics_and_dissociatives_with_mdma_mj_baseline <- p_psychedelics_and_dissociatives_with_mdma_mj_baseline %>% layout(
scene = list(
xaxis = list(title = 'Dim1', titlefont = list(size = 16, family = "Arial", weight = "bold")),
yaxis = list(title = 'Dim2', titlefont = list(size = 16, family = "Arial", weight = "bold")),
zaxis = list(title = 'Dim3', titlefont = list(size = 16, family = "Arial", weight = "bold")),
aspectmode = "cube"
),
title = paste("3D MDS - Psychedelics and Dissociatives with MDMA, MJ & Baseline \n (Stress =", stress_val_psychedelics_and_dissociatives_with_mdma_mj_baseline, ")"),
legend = list(
traceorder = "normal",
itemsizing = "constant",
font = list(size = 12)
)
)
# Display the 3D plot
p_psychedelics_and_dissociatives_with_mdma_mj_baseline
10. Stress values as a function of compared states
calculate_stress_values <- function(df) {
max_dims <- min(10, nrow(df) - 1) # Max number of dimensions is number of rows - 1
stress_values <- c()
for (dim in 1:max_dims) {
model <- mds(df, ndim = dim, type = "ordinal", init = "torgerson")
stress_values <- c(stress_values, round(model$stress, 4))
}
# If fewer than 10 dimensions were calculated, fill remaining with NA
if (max_dims < 10) {
stress_values <- c(stress_values, rep(NA, 10 - max_dims))
}
return(stress_values)
}
subject_stats <- data.frame(Subject = integer(), Num_Rows = integer(), Stress_1D = numeric(),
Stress_2D = numeric(), Stress_3D = numeric(), Stress_4D = numeric(),
Stress_5D = numeric(), Stress_6D = numeric(), Stress_7D = numeric(),
Stress_8D = numeric(), Stress_9D = numeric(), Stress_10D = numeric())
# Loop over all subjects
for (i in 1:739) { # Adjust the range based on the actual number of subjects
df_name <- paste0("df_", i)
# Check if the dataframe for the subject exists
if (exists(df_name)) {
df <- get(df_name) # Get the dataframe
# Only process if there are at least 3 rows (minimum for 2D MDS)
if (nrow(df) >= 3) {
stress_values <- calculate_stress_values(df)
subject_stats <- rbind(subject_stats, data.frame(Subject = i, Num_Rows = nrow(df),
Stress_1D = stress_values[1],
Stress_2D = stress_values[2],
Stress_3D = stress_values[3],
Stress_4D = stress_values[4],
Stress_5D = stress_values[5],
Stress_6D = stress_values[6],
Stress_7D = stress_values[7],
Stress_8D = stress_values[8],
Stress_9D = stress_values[9],
Stress_10D = stress_values[10]))
} else {
subject_stats <- rbind(subject_stats, data.frame(Subject = i, Num_Rows = nrow(df),
Stress_1D = NA, Stress_2D = NA,
Stress_3D = NA, Stress_4D = NA,
Stress_5D = NA, Stress_6D = NA,
Stress_7D = NA, Stress_8D = NA,
Stress_9D = NA, Stress_10D = NA))
}
}
}
## View the subject_stats dataframe (Optional)
#print(subject_stats)
### General plot
# Function to plot individual lines for each subject with a black line for the overall average
plot_individual_stress_with_avg <- function(subject_stats, max_dims = 3) {
# Adjust max_dims to be within the available dimensions (1-10)
max_dims <- min(max_dims, 10)
# Prepare the column names for stress dimensions based on max_dims
stress_columns <- paste0("Stress_", 1:max_dims, "D")
# Reshape the data into a long format for plotting, only up to max_dims
subject_stats_melted <- melt(subject_stats, id.vars = c("Subject", "Num_Rows"),
measure.vars = stress_columns,
variable.name = "Dimension", value.name = "Stress")
# Convert Dimension column to numeric, removing 'Stress_' and 'D'
subject_stats_melted$Dimension <- as.numeric(gsub("Stress_|D", "", subject_stats_melted$Dimension))
# Calculate the average stress for each dimension, skipping NAs
avg_stress <- subject_stats_melted %>%
group_by(Dimension) %>%
summarize(Avg_Stress = mean(Stress, na.rm = TRUE))
# Plot individual lines for each subject, skipping NAs
ggplot() +
# Individual subject lines, semi-transparent
geom_line(data = subject_stats_melted, aes(x = Dimension, y = Stress, group = Subject, color = as.factor(Subject)),
alpha = 0.3, size = 0.8, na.rm = TRUE) +
geom_point(data = subject_stats_melted, aes(x = Dimension, y = Stress, group = Subject, color = as.factor(Subject)),
size = 2, alpha = 0.3, na.rm = TRUE) +
# Overall average line in black, thicker
geom_line(data = avg_stress, aes(x = Dimension, y = Avg_Stress),
color = "black", size = 1.5, na.rm = TRUE) +
geom_point(data = avg_stress, aes(x = Dimension, y = Avg_Stress),
color = "black", size = 3, na.rm = TRUE) +
labs(
title = "Stress Values Across Models from Individual Subjects",
x = "Number of Dimensions",
y = "Stress"
) +
theme_minimal() +
scale_x_continuous(breaks = 1:max_dims) + # Ensure integer values on x-axis
scale_color_viridis_d(option = "plasma") + # Use a color palette for subjects
theme(
text = element_text(size = 14),
legend.position = "none" # Hide legend for clarity, adjust as needed
)
}
# Example usage:
plot_individual_stress_with_avg(subject_stats, max_dims = 6)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
### PLOT with lines colored according to the number of compared states
# Function to plot individual lines for each subject with a black line for the overall average
# Color by the number of non-NA stress values (number of dimensions available per subject)
# Ensure that lines only plot up to the number of available dimensions (no NAs beyond that)
plot_individual_stress_with_avg <- function(subject_stats, max_dims = 10) {
# Adjust max_dims to be within the available dimensions (1-10)
max_dims <- min(max_dims, 10)
# Prepare the column names for stress dimensions based on max_dims
stress_columns <- paste0("Stress_", 1:max_dims, "D")
# Reshape the data into a long format for plotting, only up to max_dims
subject_stats_melted <- melt(subject_stats, id.vars = c("Subject", "Num_Rows"),
measure.vars = stress_columns,
variable.name = "Dimension", value.name = "Stress")
# Convert Dimension column to numeric, removing 'Stress_' and 'D'
subject_stats_melted$Dimension <- as.numeric(gsub("Stress_|D", "", subject_stats_melted$Dimension))
# Filter out rows where Stress is NA (this ensures shorter lines for subjects with fewer valid dimensions)
subject_stats_melted <- subject_stats_melted %>% filter(!is.na(Stress))
# Count the number of valid dimensions for each subject (used for color coding)
subject_stats_melted <- subject_stats_melted %>%
group_by(Subject) %>%
mutate(Non_NA_Dims = n())
# Calculate the average stress for each dimension, skipping NAs
avg_stress <- subject_stats_melted %>%
group_by(Dimension) %>%
summarize(Avg_Stress = mean(Stress, na.rm = TRUE))
# Plot individual lines for each subject, colored by the number of non-NA dimensions
ggplot() +
# Individual subject lines, more transparent with lower alpha
geom_line(data = subject_stats_melted, aes(x = Dimension, y = Stress, group = Subject, color = Non_NA_Dims),
alpha = 0.3, size = 0.8, na.rm = TRUE) + # Lower alpha for more transparency
geom_point(data = subject_stats_melted, aes(x = Dimension, y = Stress, group = Subject, color = Non_NA_Dims),
size = 2, alpha = 0.3, na.rm = TRUE) + # Lower alpha for points as well
# Overall average line in black, thicker
geom_line(data = avg_stress, aes(x = Dimension, y = Avg_Stress),
color = "black", size = 1.5, na.rm = TRUE) +
geom_point(data = avg_stress, aes(x = Dimension, y = Avg_Stress),
color = "black", size = 3, na.rm = TRUE) +
labs(
title = "Stress Values Across Models from Individual Subjects",
x = "Number of Dimensions",
y = "Stress"
) +
scale_color_viridis_c(option = "viridis", name = "Available Dimensions \n (Compared States - 1)") + # Use color to represent the number of dimensions
scale_x_continuous(breaks = 1:max_dims) + # Ensure integer values on x-axis
theme_minimal() +
theme(
text = element_text(size = 14),
legend.position = "right" # Show legend to denote number of dimensions
)
}
# Note that subjects with more than 11 compared states are represented as having at least 10 available dimensions
# Example usage:
# Plot the stress values with 10 dimensions
plot_individual_stress_with_avg(subject_stats, max_dims = 10)